rm(list=ls())

knitr::opts_knit$set(root.dir="/mnt/vmfileshare/ClimateData/")

library(ggplot2)
library(terra)
library(tmap) #pretty maps
library(RColorBrewer)
library(tidyverse)
library(kableExtra)

0. About

This is an example notebook for the assessment of bias corrected data, using output from the R ‘qmap’ package for the city of Glasgow and the variable ‘tasmax’.

Input data

This script requires the following data:

  • ‘obs.cal’ - observation (HADs data) for the calibration period - the dataset used as the reference dataset in the bias correction
  • ‘obs.val’ - as above, for the validation period
  • ‘cpm.cal.raw’ - the raw (uncorrected) data for the calibration period
  • ‘cpm.cal.adj’ - the adjusted (bias-corrected) data for the calibration period
  • ‘cpm.val.raw’ - the raw (uncorrected) data for the valibration period
  • ‘cpm.val.adj’ - the adjusted (bias-corrected) data for the valibration period
  • ‘cpm.proj.raw’ - the raw (uncorrected) data for the future/projected period (optional)
  • ‘cpm.proj.radj’ - the adjusted (bias-corrected) data for the future/projected period (optional)

The data is required in raster format and dataframe formats

Calibration vs Validation dates

The calibration period runs between 01-12-1980 to the day prior to 01-12-2010 The validation period runs between 01-12-2010 to the day prior to 01-12-2020

Ruth to finish cleaning up this bit (it won’t run at the moment)

Fig. Calibration period - seasonal mean

Summer only

Fig. Calibration period - seasonal max

Fig. Calibration period - Summer only

2. Bias Correction Assessment: Metrics

Using the validation data set for this

val.dfs <- c(list(obs.val.df), cpm.val.raw.df.L, cpm.val.adj.df.L)

#Convert dfs to a vector
val.dfs.v <- lapply(val.dfs, function(d){
  #Convert to single vector
  unlist(as.vector(d))})

val.dfs.v.df <- as.data.frame(val.dfs.v)
names(val.dfs.v.df) <- c("obs.val", paste0("Run", rep(runs, 2), "_", var, ".",rep(c("raw", "adj", 4)))) # Names for easy reference

2a. Descriptive statistics

descriptives <- apply(val.dfs.v.df, 2, function(x){ 
  per <- data.frame(as.list(quantile(x, probs=c(0.1, 0.9))))
  data.frame(mean=mean(x), sd=sd(x), min = min(x), per10th=per$X10.,per90th=per$X90., max = max(x))
})

descriptives <- descriptives %>% reduce(rbind)
row.names(descriptives) <- names(val.dfs.v.df)
d <- t(descriptives)

d %>% 
  kable(booktabs = T) %>%
  kable_styling() %>%
  row_spec(grep(".bc.",row.names(d)), background = "lightgrey")
obs.val Run05_tasmax.raw Run06_tasmax.adj Run07_tasmax.4 Run08_tasmax.raw Run05_tasmax.adj Run06_tasmax.4 Run07_tasmax.raw Run08_tasmax.adj
mean 12.990120 12.460644 12.047759 12.233846 12.764617 13.551728 13.128147 13.4808839 13.199155
sd 5.375065 5.987282 6.000949 5.587015 5.852800 5.557214 5.692209 5.3415785 5.598392
min -5.222516 -3.289649 -2.942725 -1.848730 -2.183447 -2.261268 -1.499793 -0.1865713 -4.036536
per10th 6.005837 4.394897 4.245508 5.023584 5.110010 6.493001 5.988253 6.6283767 5.990010
per90th 19.912904 20.198414 19.897852 19.655664 20.234034 20.832330 20.699387 20.5333003 20.171180
max 31.605967 32.398094 31.079248 32.790184 35.011864 34.132040 31.410919 33.5803528 34.205112

Fig.Density plot of validation period

Note - need to add back in some facetting to this fig

m <- reshape2::melt(val.dfs.v.df)

ggplot(m, aes(value, fill=variable, colour=variable)) + 
  geom_density(alpha = 0.3, position="identity") + 
  theme_minimal() +
  scale_fill_brewer(palette = "Set1") +
  scale_color_brewer(palette = "Set1")

2b. Model fit statistics

Using the following to assess overall fit:

  • R-squared (rsq)
  • Root Square Mean Error (RMSE)
  • Nash-Sutcliffe Efficiency (NSE): Magnitude of residual variance compared to measured data variance, ranges -∞ to 1, 1 = perfect match to observations
  • Percent bias (PBIAS): The optimal value of PBIAS is 0.0, with low-magnitude values indicating accurate model simulation. Positive values indicate overestimation bias, whereas negative values indicate model underestimation bias.
actual <- val.dfs.v.df$obs.val

rsq <- sapply(val.dfs.v.df[c(2:ncol(val.dfs.v.df))], function(x){
  cor(actual, x)^2
})
rmse <- sapply(val.dfs.v.df[c(2:ncol(val.dfs.v.df))], function(x){
  sqrt(mean((actual - x)^2))
})
pbias <- sapply(val.dfs.v.df[c(2:ncol(val.dfs.v.df))], function(x){
  hydroGOF::pbias(x, actual)
})
nse <- sapply(val.dfs.v.df[c(2:ncol(val.dfs.v.df))], function(x){
  hydroGOF::NSE(x, actual)
})

Highlighting the bias corrected statistics

k <- cbind(rsq, rmse, pbias, nse)
k %>% 
  kable(booktabs = T) %>%
  kable_styling() %>%
  row_spec(grep(".bc.",row.names(k)), background = "lightgrey")
rsq rmse pbias nse
Run05_tasmax.raw 0.5555510 4.128585 -4.1 0.4100210
Run06_tasmax.adj 0.5535077 4.218502 -7.3 0.3840428
Run07_tasmax.4 0.5051351 4.241554 -5.8 0.3772924
Run08_tasmax.raw 0.5331874 4.153865 -1.7 0.4027737
Run05_tasmax.adj 0.5464456 3.990959 4.3 0.4486993
Run06_tasmax.4 0.5580189 3.949778 1.1 0.4600178
Run07_tasmax.raw 0.5082040 4.090093 3.8 0.4209707
Run08_tasmax.adj 0.5298693 4.058097 1.6 0.4299947

3. Bias Correction Assessment: Metric specific - tasmax

3b Days above 30 degrees

(Not considered consecutively here)

Number of heatwaves per annum

(to be added)

For future work

The number of quantiles selected will effect the efficacy of the bias correction: lots of options therefore with this specific method